{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "A100",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/LC1332/Chat-Haruhi-Suzumiya/blob/main/notebook/LLaMA_Factory4phi15.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rfBB8kjW1d4f"
      },
      "outputs": [],
      "source": [
        "! pip install --upgrade huggingface_hub\n",
        "! huggingface-cli login"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! git clone https://github.com/hhhwmws0117/LLaMA-Factory.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "amEnYxx7nXfY",
        "outputId": "7226e6e0-fd4f-44b9-c47b-64e851d68897"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "n\n",
            "Cloning into 'LLaMA-Factory'...\n",
            "remote: Enumerating objects: 5112, done.\u001b[K\n",
            "remote: Counting objects: 100% (346/346), done.\u001b[K\n",
            "remote: Compressing objects: 100% (162/162), done.\u001b[K\n",
            "remote: Total 5112 (delta 226), reused 260 (delta 184), pack-reused 4766\u001b[K\n",
            "Receiving objects: 100% (5112/5112), 184.67 MiB | 29.49 MiB/s, done.\n",
            "Resolving deltas: 100% (3622/3622), done.\n",
            "Updating files: 100% (130/130), done.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%cd LLaMA-Factory\n",
        "!pip install -r requirements.txt\n",
        "!pip install einops"
      ],
      "metadata": {
        "id": "7H-obXT42Pmr",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c19152d8-83c2-4b24-92ee-a3e2c4ae53af"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/LLaMA-Factory\n",
            "Requirement already satisfied: torch>=1.13.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 1)) (2.1.0+cu118)\n",
            "Collecting transformers<4.35.0,>=4.31.0 (from -r requirements.txt (line 2))\n",
            "  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting datasets>=2.14.0 (from -r requirements.txt (line 3))\n",
            "  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting accelerate>=0.21.0 (from -r requirements.txt (line 4))\n",
            "  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m25.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting peft>=0.6.0 (from -r requirements.txt (line 5))\n",
            "  Downloading peft-0.6.2-py3-none-any.whl (174 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.7/174.7 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting trl>=0.7.4 (from -r requirements.txt (line 6))\n",
            "  Downloading trl-0.7.4-py3-none-any.whl (133 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.9/133.9 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.50.2-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 8)) (1.11.4)\n",
            "Collecting sentencepiece (from -r requirements.txt (line 9))\n",
            "  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m80.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 10)) (3.20.3)\n",
            "Collecting tiktoken (from -r requirements.txt (line 11))\n",
            "  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m95.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 12)) (0.42.1)\n",
            "Collecting rouge-chinese (from -r requirements.txt (line 13))\n",
            "  Downloading rouge_chinese-1.0.3-py3-none-any.whl (21 kB)\n",
            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 14)) (3.8.1)\n",
            "Collecting uvicorn (from -r requirements.txt (line 15))\n",
            "  Downloading uvicorn-0.24.0.post1-py3-none-any.whl (59 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.7/59.7 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 16)) (1.10.13)\n",
            "Collecting fastapi (from -r requirements.txt (line 17))\n",
            "  Downloading fastapi-0.104.1-py3-none-any.whl (92 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.9/92.9 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting sse-starlette (from -r requirements.txt (line 18))\n",
            "  Downloading sse_starlette-1.8.2-py3-none-any.whl (8.9 kB)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 19)) (3.7.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (3.13.1)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (3.2.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (3.1.2)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (2023.6.0)\n",
            "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->-r requirements.txt (line 1)) (2.1.0)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (0.19.4)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (1.23.5)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (23.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (2023.6.3)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (2.31.0)\n",
            "Collecting tokenizers<0.15,>=0.14 (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2))\n",
            "  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m108.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (0.4.1)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (4.66.1)\n",
            "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.14.0->-r requirements.txt (line 3)) (9.0.0)\n",
            "Collecting pyarrow-hotfix (from datasets>=2.14.0->-r requirements.txt (line 3))\n",
            "  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
            "Collecting dill<0.3.8,>=0.3.0 (from datasets>=2.14.0->-r requirements.txt (line 3))\n",
            "  Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.14.0->-r requirements.txt (line 3)) (1.5.3)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.14.0->-r requirements.txt (line 3)) (3.4.1)\n",
            "Collecting multiprocess (from datasets>=2.14.0->-r requirements.txt (line 3))\n",
            "  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.14.0->-r requirements.txt (line 3)) (3.9.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->-r requirements.txt (line 4)) (5.9.5)\n",
            "Collecting tyro>=0.5.11 (from trl>=0.7.4->-r requirements.txt (line 6))\n",
            "  Downloading tyro-0.6.0-py3-none-any.whl (100 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m100.9/100.9 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting aiofiles<24.0,>=22.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (4.2.2)\n",
            "Collecting ffmpy (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting gradio-client==0.6.1 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.6.1-py3-none-any.whl (299 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m299.2/299.2 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting httpx (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading httpx-0.25.2-py3-none-any.whl (74 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.0/75.0 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (6.1.1)\n",
            "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (2.1.3)\n",
            "Collecting orjson~=3.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading orjson-3.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.7/138.7 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (9.4.0)\n",
            "Collecting pydub (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
            "Collecting python-multipart (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting semantic-version~=2.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
            "Collecting websockets<12.0,>=10.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from rouge-chinese->-r requirements.txt (line 13)) (1.16.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->-r requirements.txt (line 14)) (8.1.7)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->-r requirements.txt (line 14)) (1.3.2)\n",
            "Collecting h11>=0.8 (from uvicorn->-r requirements.txt (line 15))\n",
            "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: anyio<4.0.0,>=3.7.1 in /usr/local/lib/python3.10/dist-packages (from fastapi->-r requirements.txt (line 17)) (3.7.1)\n",
            "Collecting starlette<0.28.0,>=0.27.0 (from fastapi->-r requirements.txt (line 17))\n",
            "  Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting typing-extensions (from torch>=1.13.1->-r requirements.txt (line 1))\n",
            "  Downloading typing_extensions-4.8.0-py3-none-any.whl (31 kB)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->-r requirements.txt (line 19)) (1.2.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->-r requirements.txt (line 19)) (0.12.1)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->-r requirements.txt (line 19)) (4.45.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->-r requirements.txt (line 19)) (1.4.5)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->-r requirements.txt (line 19)) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->-r requirements.txt (line 19)) (2.8.2)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (0.4)\n",
            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (4.19.2)\n",
            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (0.12.0)\n",
            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4.0.0,>=3.7.1->fastapi->-r requirements.txt (line 17)) (3.6)\n",
            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4.0.0,>=3.7.1->fastapi->-r requirements.txt (line 17)) (1.3.0)\n",
            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4.0.0,>=3.7.1->fastapi->-r requirements.txt (line 17)) (1.2.0)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.14.0->-r requirements.txt (line 3)) (23.1.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.14.0->-r requirements.txt (line 3)) (6.0.4)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.14.0->-r requirements.txt (line 3)) (1.9.3)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.14.0->-r requirements.txt (line 3)) (1.4.0)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.14.0->-r requirements.txt (line 3)) (1.3.1)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.14.0->-r requirements.txt (line 3)) (4.0.3)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.14.0->-r requirements.txt (line 3)) (2023.3.post1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (3.3.2)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2)) (2023.11.17)\n",
            "INFO: pip is looking at multiple versions of tokenizers to determine which version is compatible with other requirements. This could take a while.\n",
            "Collecting tokenizers<0.15,>=0.14 (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2))\n",
            "  Downloading tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m121.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.50.1-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m60.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.50.0-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.49.0-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hINFO: pip is looking at multiple versions of tokenizers to determine which version is compatible with other requirements. This could take a while.\n",
            "  Downloading gradio-3.48.0-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m63.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.47.1-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client==0.6.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.6.0-py3-none-any.whl (298 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.8/298.8 kB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.47.0-py3-none-any.whl (20.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m64.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hINFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n",
            "  Downloading gradio-3.46.1-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m62.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client==0.5.3 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.5.3-py3-none-any.whl (298 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.4/298.4 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.46.0-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.45.2-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m60.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.45.1-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client==0.5.2 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.5.2-py3-none-any.whl (298 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.3/298.3 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.45.0-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m67.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.44.4-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m65.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client==0.5.1 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.5.1-py3-none-any.whl (298 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.2/298.2 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.44.3-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m66.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client==0.5.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.5.0-py3-none-any.whl (298 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.2/298.2 kB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.44.2-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m69.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.44.1-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m67.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.44.0-py3-none-any.whl (20.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m66.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.43.2-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.43.1-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.43.0-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m65.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.42.0-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m64.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.41.2-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m69.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.41.1-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.41.0-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m64.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.40.1-py3-none-any.whl (20.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m54.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client>=0.4.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.7.0-py3-none-any.whl (302 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.7/302.7 kB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: markdown-it-py[linkify]>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (3.0.0)\n",
            "Collecting mdit-py-plugins<=0.3.3 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client>=0.4.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.4.0-py3-none-any.whl (297 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.4/297.4 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.40.0-py3-none-any.whl (20.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading gradio-3.39.0-py3-none-any.whl (19.9 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.9/19.9 MB\u001b[0m \u001b[31m69.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client>=0.3.0 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.3.0-py3-none-any.whl (294 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.2/294.2 kB\u001b[0m \u001b[31m37.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio<4.0.0,>=3.38.0 (from -r requirements.txt (line 7))\n",
            "  Downloading gradio-3.38.0-py3-none-any.whl (19.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m67.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting gradio-client>=0.2.10 (from gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading gradio_client-0.2.10-py3-none-any.whl (288 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting peft>=0.6.0 (from -r requirements.txt (line 5))\n",
            "  Downloading peft-0.6.1-py3-none-any.whl (135 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m136.0/136.0 kB\u001b[0m \u001b[31m20.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting accelerate>=0.21.0 (from -r requirements.txt (line 4))\n",
            "  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m261.4/261.4 kB\u001b[0m \u001b[31m210.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading accelerate-0.24.0-py3-none-any.whl (260 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m261.0/261.0 kB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m258.1/258.1 kB\u001b[0m \u001b[31m31.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting datasets>=2.14.0 (from -r requirements.txt (line 3))\n",
            "  Downloading datasets-2.14.7-py3-none-any.whl (520 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m520.4/520.4 kB\u001b[0m \u001b[31m53.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting huggingface-hub<1.0,>=0.16.4 (from transformers<4.35.0,>=4.31.0->-r requirements.txt (line 2))\n",
            "  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m35.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting docstring-parser>=0.14.1 (from tyro>=0.5.11->trl>=0.7.4->-r requirements.txt (line 6))\n",
            "  Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n",
            "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl>=0.7.4->-r requirements.txt (line 6)) (13.7.0)\n",
            "Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl>=0.7.4->-r requirements.txt (line 6))\n",
            "  Downloading shtab-1.6.5-py3-none-any.whl (13 kB)\n",
            "Collecting httpcore==1.* (from httpx->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7))\n",
            "  Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.1->-r requirements.txt (line 1)) (1.3.0)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (2023.11.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (0.31.1)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (0.13.2)\n",
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl>=0.7.4->-r requirements.txt (line 6)) (2.16.1)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py[linkify]>=2.0.0->gradio<4.0.0,>=3.38.0->-r requirements.txt (line 7)) (0.1.2)\n",
            "Building wheels for collected packages: ffmpy\n",
            "  Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for ffmpy: filename=ffmpy-0.3.1-py3-none-any.whl size=5579 sha256=ff7c336ceabf20192a336152ab8d90e4d884853e1c8e63aa049f36fe35bbafb1\n",
            "  Stored in directory: /root/.cache/pip/wheels/01/a6/d1/1c0828c304a4283b2c1639a09ad86f83d7c487ef34c6b4a1bf\n",
            "Successfully built ffmpy\n",
            "Installing collected packages: sentencepiece, pydub, ffmpy, websockets, typing-extensions, shtab, semantic-version, rouge-chinese, python-multipart, pyarrow-hotfix, orjson, h11, docstring-parser, dill, aiofiles, uvicorn, tiktoken, starlette, multiprocess, huggingface-hub, httpcore, tyro, tokenizers, httpx, fastapi, accelerate, transformers, sse-starlette, gradio-client, datasets, trl, peft, gradio\n",
            "  Attempting uninstall: typing-extensions\n",
            "    Found existing installation: typing_extensions 4.5.0\n",
            "    Uninstalling typing_extensions-4.5.0:\n",
            "      Successfully uninstalled typing_extensions-4.5.0\n",
            "  Attempting uninstall: huggingface-hub\n",
            "    Found existing installation: huggingface-hub 0.19.4\n",
            "    Uninstalling huggingface-hub-0.19.4:\n",
            "      Successfully uninstalled huggingface-hub-0.19.4\n",
            "  Attempting uninstall: tokenizers\n",
            "    Found existing installation: tokenizers 0.15.0\n",
            "    Uninstalling tokenizers-0.15.0:\n",
            "      Successfully uninstalled tokenizers-0.15.0\n",
            "  Attempting uninstall: transformers\n",
            "    Found existing installation: transformers 4.35.2\n",
            "    Uninstalling transformers-4.35.2:\n",
            "      Successfully uninstalled transformers-4.35.2\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "lida 0.0.10 requires kaleido, which is not installed.\n",
            "llmx 0.0.15a0 requires cohere, which is not installed.\n",
            "llmx 0.0.15a0 requires openai, which is not installed.\n",
            "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.8.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed accelerate-0.25.0 aiofiles-23.2.1 datasets-2.14.7 dill-0.3.7 docstring-parser-0.15 fastapi-0.104.1 ffmpy-0.3.1 gradio-3.50.2 gradio-client-0.6.1 h11-0.14.0 httpcore-1.0.2 httpx-0.25.2 huggingface-hub-0.17.3 multiprocess-0.70.15 orjson-3.9.10 peft-0.6.2 pyarrow-hotfix-0.6 pydub-0.25.1 python-multipart-0.0.6 rouge-chinese-1.0.3 semantic-version-2.10.0 sentencepiece-0.1.99 shtab-1.6.5 sse-starlette-1.8.2 starlette-0.27.0 tiktoken-0.5.2 tokenizers-0.14.1 transformers-4.34.1 trl-0.7.4 typing-extensions-4.8.0 tyro-0.6.0 uvicorn-0.24.0.post1 websockets-11.0.3\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \\\n",
        "    --stage sft \\\n",
        "    --model_name_or_path microsoft/phi-1_5 \\\n",
        "    --do_train True\\\n",
        "    --dataset chatharuhi \\\n",
        "    --template vanilla \\\n",
        "    --finetuning_type lora \\\n",
        "    --lora_target Wqkv \\\n",
        "    --lora_rank 16 \\\n",
        "    --lora_dropout 0.05 \\\n",
        "    --output_dir phi-1_5-finetuned \\\n",
        "    --overwrite_cache \\\n",
        "    --per_device_train_batch_size 4 \\\n",
        "    --gradient_accumulation_steps 1 \\\n",
        "    --lr_scheduler_type cosine \\\n",
        "    --logging_steps 100 \\\n",
        "    --save_steps 1000 \\\n",
        "    --learning_rate 2e-4 \\\n",
        "    --num_train_epochs 5.0 \\\n",
        "    --plot_loss \\\n",
        "    --fp16 True"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vhGdi7BW2q_T",
        "outputId": "0067c866-813a-4343-8a80-91ac599e1782"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-12-04 05:51:13.985504: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2023-12-04 05:51:13.985571: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2023-12-04 05:51:13.985622: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2023-12-04 05:51:15.196623: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n",
            "  warnings.warn(\n",
            "12/04/2023 05:51:17 - WARNING - llmtuner.model.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.\n",
            "[INFO|training_args.py:1345] 2023-12-04 05:51:17,774 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!\n",
            "[INFO|training_args.py:1798] 2023-12-04 05:51:17,775 >> PyTorch: setting up devices\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1711: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
            "  warnings.warn(\n",
            "12/04/2023 05:51:17 - INFO - llmtuner.model.parser - Process rank: 0, device: cuda:0, n_gpu: 1\n",
            "  distributed training: True, compute dtype: torch.float16\n",
            "12/04/2023 05:51:17 - INFO - llmtuner.model.parser - Training/evaluation parameters Seq2SeqTrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_backend=None,\n",
            "ddp_broadcast_buffers=None,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=False,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "dispatch_batches=None,\n",
            "do_eval=False,\n",
            "do_predict=False,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=None,\n",
            "evaluation_strategy=no,\n",
            "fp16=True,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "generation_config=None,\n",
            "generation_max_length=None,\n",
            "generation_num_beams=None,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=None,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_always_push=False,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "include_tokens_per_second=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=0.0002,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=False,\n",
            "local_rank=0,\n",
            "log_level=passive,\n",
            "log_level_replica=warning,\n",
            "log_on_each_node=True,\n",
            "logging_dir=phi-1_5-finetuned/runs/Dec04_05-51-17_1896232f05e4,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=100,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=cosine,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=-1,\n",
            "metric_for_best_model=None,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_torch,\n",
            "optim_args=None,\n",
            "output_dir=phi-1_5-finetuned,\n",
            "overwrite_output_dir=False,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=8,\n",
            "per_device_train_batch_size=4,\n",
            "predict_with_generate=False,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=phi-1_5-finetuned,\n",
            "save_on_each_node=False,\n",
            "save_safetensors=False,\n",
            "save_steps=1000,\n",
            "save_strategy=steps,\n",
            "save_total_limit=None,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "sortish_sampler=False,\n",
            "tf32=None,\n",
            "torch_compile=False,\n",
            "torch_compile_backend=None,\n",
            "torch_compile_mode=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_cpu=False,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            ")\n",
            "12/04/2023 05:51:17 - INFO - llmtuner.data.loader - Loading dataset silk-road/ChatHaruhi-English-62K-RolePlaying...\n",
            "https://huggingface.co/datasets/silk-road/ChatHaruhi-English-62K-RolePlaying/resolve/428afdf837d2007754923886d907ca48963d1da9/README.md not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/2bc2d47988891a3be1f113a840ed77bc3fbab002b1106ebe63f37a7755126cef.90acf99f118ee2a3494f75d6b070b0a009865a4997606aeddfe845ac62c60c78.incomplete\n",
            "Downloading readme: 100% 2.17k/2.17k [00:00<00:00, 12.5MB/s]\n",
            "storing https://huggingface.co/datasets/silk-road/ChatHaruhi-English-62K-RolePlaying/resolve/428afdf837d2007754923886d907ca48963d1da9/README.md in cache at /root/.cache/huggingface/datasets/downloads/2bc2d47988891a3be1f113a840ed77bc3fbab002b1106ebe63f37a7755126cef.90acf99f118ee2a3494f75d6b070b0a009865a4997606aeddfe845ac62c60c78\n",
            "creating metadata file for /root/.cache/huggingface/datasets/downloads/2bc2d47988891a3be1f113a840ed77bc3fbab002b1106ebe63f37a7755126cef.90acf99f118ee2a3494f75d6b070b0a009865a4997606aeddfe845ac62c60c78\n",
            "Using custom data configuration default-aee6a3dcb2798cbe\n",
            "Loading Dataset Infos from /usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json\n",
            "Generating dataset chat_haruhi-english-62_k-role_playing (/root/.cache/huggingface/datasets/silk-road___chat_haruhi-english-62_k-role_playing/default-aee6a3dcb2798cbe/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)\n",
            "Downloading and preparing dataset chat_haruhi-english-62_k-role_playing/default to /root/.cache/huggingface/datasets/silk-road___chat_haruhi-english-62_k-role_playing/default-aee6a3dcb2798cbe/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n",
            "Dataset not on Hf google storage. Downloading and preparing it from source\n",
            "Downloading data files:   0% 0/1 [00:00<?, ?it/s]hf://datasets/silk-road/ChatHaruhi-English-62K-RolePlaying@428afdf837d2007754923886d907ca48963d1da9/ChatHaruhi_pure_English_62K.jsonl not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/1f8ac2a8996f94573b3f32368ffff90e3ba9ed5c5622bdc8ea5c20f20db5c4df.incomplete\n",
            "\n",
            "Downloading data:   0% 0.00/243M [00:00<?, ?B/s]\u001b[A\n",
            "Downloading data:   2% 4.19M/243M [00:00<00:20, 11.4MB/s]\u001b[A\n",
            "Downloading data:   5% 12.6M/243M [00:00<00:09, 25.3MB/s]\u001b[A\n",
            "Downloading data:   9% 21.0M/243M [00:00<00:07, 30.8MB/s]\u001b[A\n",
            "Downloading data:  12% 29.4M/243M [00:00<00:05, 37.5MB/s]\u001b[A\n",
            "Downloading data:  16% 37.7M/243M [00:01<00:04, 42.4MB/s]\u001b[A\n",
            "Downloading data:  19% 46.1M/243M [00:01<00:04, 44.0MB/s]\u001b[A\n",
            "Downloading data:  22% 54.5M/243M [00:01<00:03, 48.4MB/s]\u001b[A\n",
            "Downloading data:  26% 62.9M/243M [00:01<00:03, 49.2MB/s]\u001b[A\n",
            "Downloading data:  29% 71.3M/243M [00:01<00:03, 49.5MB/s]\u001b[A\n",
            "Downloading data:  33% 79.7M/243M [00:01<00:03, 52.6MB/s]\u001b[A\n",
            "Downloading data:  36% 88.1M/243M [00:02<00:03, 50.9MB/s]\u001b[A\n",
            "Downloading data:  40% 96.5M/243M [00:02<00:02, 53.0MB/s]\u001b[A\n",
            "Downloading data:  43% 105M/243M [00:02<00:02, 48.0MB/s] \u001b[A\n",
            "Downloading data:  47% 113M/243M [00:02<00:02, 44.0MB/s]\u001b[A\n",
            "Downloading data:  50% 122M/243M [00:02<00:02, 47.8MB/s]\u001b[A\n",
            "Downloading data:  54% 130M/243M [00:02<00:02, 48.5MB/s]\u001b[A\n",
            "Downloading data:  57% 138M/243M [00:03<00:02, 50.5MB/s]\u001b[A\n",
            "Downloading data:  61% 147M/243M [00:03<00:01, 50.9MB/s]\u001b[A\n",
            "Downloading data:  64% 155M/243M [00:03<00:02, 34.6MB/s]\u001b[A\n",
            "Downloading data:  67% 164M/243M [00:03<00:01, 39.6MB/s]\u001b[A\n",
            "Downloading data:  71% 172M/243M [00:03<00:01, 44.0MB/s]\u001b[A\n",
            "Downloading data:  74% 180M/243M [00:04<00:01, 45.9MB/s]\u001b[A\n",
            "Downloading data:  78% 189M/243M [00:04<00:01, 46.6MB/s]\u001b[A\n",
            "Downloading data:  81% 197M/243M [00:04<00:00, 49.2MB/s]\u001b[A\n",
            "Downloading data:  85% 206M/243M [00:04<00:00, 53.1MB/s]\u001b[A\n",
            "Downloading data:  88% 214M/243M [00:04<00:00, 50.3MB/s]\u001b[A\n",
            "Downloading data:  92% 222M/243M [00:04<00:00, 50.9MB/s]\u001b[A\n",
            "Downloading data:  95% 231M/243M [00:05<00:00, 44.4MB/s]\u001b[A\n",
            "Downloading data: 100% 243M/243M [00:05<00:00, 43.8MB/s]\n",
            "storing hf://datasets/silk-road/ChatHaruhi-English-62K-RolePlaying@428afdf837d2007754923886d907ca48963d1da9/ChatHaruhi_pure_English_62K.jsonl in cache at /root/.cache/huggingface/datasets/downloads/1f8ac2a8996f94573b3f32368ffff90e3ba9ed5c5622bdc8ea5c20f20db5c4df\n",
            "creating metadata file for /root/.cache/huggingface/datasets/downloads/1f8ac2a8996f94573b3f32368ffff90e3ba9ed5c5622bdc8ea5c20f20db5c4df\n",
            "Downloading data files: 100% 1/1 [00:05<00:00,  5.54s/it]\n",
            "Downloading took 0.0 min\n",
            "Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 1/1 [00:00<00:00, 1489.98it/s]\n",
            "Generating train split\n",
            "Generating train split: 62362 examples [00:00, 115917.04 examples/s]\n",
            "Unable to verify splits sizes.\n",
            "Dataset chat_haruhi-english-62_k-role_playing downloaded and prepared to /root/.cache/huggingface/datasets/silk-road___chat_haruhi-english-62_k-role_playing/default-aee6a3dcb2798cbe/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n",
            "Downloading tokenizer_config.json: 100% 237/237 [00:00<00:00, 1.26MB/s]\n",
            "Downloading vocab.json: 100% 798k/798k [00:00<00:00, 11.5MB/s]\n",
            "Downloading merges.txt: 100% 456k/456k [00:00<00:00, 11.4MB/s]\n",
            "Downloading tokenizer.json: 100% 2.11M/2.11M [00:00<00:00, 30.0MB/s]\n",
            "Downloading added_tokens.json: 100% 1.08k/1.08k [00:00<00:00, 7.31MB/s]\n",
            "Downloading (…)cial_tokens_map.json: 100% 99.0/99.0 [00:00<00:00, 584kB/s]\n",
            "[INFO|tokenization_utils_base.py:2015] 2023-12-04 05:51:26,626 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/vocab.json\n",
            "[INFO|tokenization_utils_base.py:2015] 2023-12-04 05:51:26,626 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/merges.txt\n",
            "[INFO|tokenization_utils_base.py:2015] 2023-12-04 05:51:26,626 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:2015] 2023-12-04 05:51:26,626 >> loading file added_tokens.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/added_tokens.json\n",
            "[INFO|tokenization_utils_base.py:2015] 2023-12-04 05:51:26,626 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/special_tokens_map.json\n",
            "[INFO|tokenization_utils_base.py:2015] 2023-12-04 05:51:26,626 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/tokenizer_config.json\n",
            "Downloading config.json: 100% 727/727 [00:00<00:00, 3.64MB/s]\n",
            "[INFO|configuration_utils.py:715] 2023-12-04 05:51:26,807 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/config.json\n",
            "Downloading configuration_phi.py: 100% 2.03k/2.03k [00:00<00:00, 10.9MB/s]\n",
            "[INFO|configuration_utils.py:715] 2023-12-04 05:51:26,978 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/config.json\n",
            "[INFO|configuration_utils.py:775] 2023-12-04 05:51:26,980 >> Model config PhiConfig {\n",
            "  \"_name_or_path\": \"microsoft/phi-1_5\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"PhiForCausalLM\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.0,\n",
            "  \"auto_map\": {\n",
            "    \"AutoConfig\": \"microsoft/phi-1_5--configuration_phi.PhiConfig\",\n",
            "    \"AutoModelForCausalLM\": \"microsoft/phi-1_5--modeling_phi.PhiForCausalLM\"\n",
            "  },\n",
            "  \"embd_pdrop\": 0.0,\n",
            "  \"flash_attn\": false,\n",
            "  \"flash_rotary\": false,\n",
            "  \"fused_dense\": false,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"phi\",\n",
            "  \"n_embd\": 2048,\n",
            "  \"n_head\": 32,\n",
            "  \"n_head_kv\": null,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 24,\n",
            "  \"n_positions\": 2048,\n",
            "  \"resid_pdrop\": 0.0,\n",
            "  \"rotary_dim\": 32,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"float16\",\n",
            "  \"transformers_version\": \"4.34.1\",\n",
            "  \"vocab_size\": 51200\n",
            "}\n",
            "\n",
            "Downloading modeling_phi.py: 100% 33.8k/33.8k [00:00<00:00, 99.8MB/s]\n",
            "Downloading pytorch_model.bin: 100% 2.84G/2.84G [00:16<00:00, 176MB/s]\n",
            "[INFO|modeling_utils.py:2993] 2023-12-04 05:51:43,647 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/pytorch_model.bin\n",
            "[INFO|modeling_utils.py:1220] 2023-12-04 05:51:45,157 >> Instantiating PhiForCausalLM model under default dtype torch.float16.\n",
            "[INFO|configuration_utils.py:770] 2023-12-04 05:51:45,269 >> Generate config GenerationConfig {}\n",
            "\n",
            "[INFO|configuration_utils.py:770] 2023-12-04 05:51:45,270 >> Generate config GenerationConfig {}\n",
            "\n",
            "[INFO|modeling_utils.py:3775] 2023-12-04 05:51:47,335 >> All model checkpoint weights were used when initializing PhiForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:3783] 2023-12-04 05:51:47,335 >> All the weights of PhiForCausalLM were initialized from the model checkpoint at microsoft/phi-1_5.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use PhiForCausalLM for predictions without further training.\n",
            "Downloading generation_config.json: 100% 69.0/69.0 [00:00<00:00, 454kB/s]\n",
            "[INFO|configuration_utils.py:730] 2023-12-04 05:51:47,442 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/5fd430c7bcd28140560faee2014d1228338e19a0/generation_config.json\n",
            "[INFO|configuration_utils.py:770] 2023-12-04 05:51:47,442 >> Generate config GenerationConfig {}\n",
            "\n",
            "12/04/2023 05:51:47 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA\n",
            "12/04/2023 05:51:47 - INFO - llmtuner.model.loader - trainable params: 3145728 || all params: 1421416448 || trainable%: 0.2213\n",
            "12/04/2023 05:51:47 - INFO - llmtuner.data.template - Add pad token: <|endoftext|>\n",
            "Running tokenizer on dataset:   0% 0/62362 [00:00<?, ? examples/s][WARNING|tokenization_utils_base.py:3823] 2023-12-04 05:51:49,820 >> Token indices sequence length is longer than the specified maximum sequence length for this model (2651 > 2048). Running this sequence through the model will result in indexing errors\n",
            "Caching processed dataset at /root/.cache/huggingface/datasets/silk-road___chat_haruhi-english-62_k-role_playing/default-aee6a3dcb2798cbe/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-1d19d2d440c0a58d.arrow\n",
            "Running tokenizer on dataset: 100% 62362/62362 [03:10<00:00, 326.78 examples/s]\n",
            "input_ids:\n",
            "[50256, 32, 8537, 1022, 257, 11040, 2836, 290, 281, 11666, 4430, 8796, 13, 383, 8796, 3607, 7613, 11, 6496, 11, 290, 23507, 7429, 284, 262, 2836, 338, 2683, 13, 198, 20490, 25, 314, 765, 345, 284, 719, 588, 34536, 10382, 422, 4403, 9801, 17003, 13, 198, 1532, 1854, 447, 246, 2683, 389, 3519, 351, 262, 5337, 11, 3387, 1949, 284, 32349, 262, 2656, 3951, 422, 262, 5337, 13, 198, 40, 765, 345, 284, 3031, 290, 3280, 588, 34536, 1262, 262, 8216, 11, 5642, 290, 25818, 34536, 561, 779, 13, 198, 1639, 1276, 760, 477, 286, 262, 3725, 286, 34536, 13, 198, 198, 6425, 326, 34536, 468, 1728, 1919, 13156, 11, 3360, 19407, 13006, 290, 15679, 4069, 13, 198, 3347, 25900, 7832, 284, 14084, 1410, 465, 1204, 1864, 284, 465, 898, 13870, 290, 7269, 11, 407, 5086, 597, 44365, 198, 1544, 1690, 3568, 8571, 863, 290, 2116, 12, 49955, 287, 2166, 286, 2460, 11, 14773, 2241, 284, 307, 39186, 9098, 13, 628, 198, 39914, 8188, 329, 262, 2597, 389, 355, 5679, 25, 198, 21017, 198, 49, 1228, 25, 13697, 5247, 1497, 13, 10148, 7, 3347, 25900, 30151, 8, 7061, 13700, 198, 3347, 25900, 25, 13697, 34, 16421, 263, 290, 11040, 263, 43735, 198, 21017, 198, 36185, 446, 25, 13697, 7061, 7, 12727, 278, 8, 7061, 4930, 8632, 826, 612, 43735, 198, 3347, 25900, 25, 13697, 7061, 7, 2514, 734, 22812, 2470, 12, 11534, 661, 30876, 262, 584, 8632, 8, 7061, 46892, 7354, 427, 64, 279, 732, 13, 10148, 7, 24334, 1159, 23677, 284, 564, 250, 14617, 7547, 1482, 38669, 447, 251, 2014, 7061, 48989, 2124, 494, 13, 10148, 7, 10449, 345, 8, 7061, 13700, 198, 36185, 446, 25, 13697, 3347, 25900, 11, 314, 892, 314, 447, 247, 303, 925, 257, 7457, 43735, 198, 3347, 25900, 25, 13697, 40, 460, 766, 326, 13, 17486, 345, 821, 5410, 319, 2491, 257, 22336, 11, 11236, 1111, 43672, 290, 49159, 18821, 318, 257, 39515, 5901, 49052, 43735, 198, 36185, 446, 25, 13697, 2949, 11, 340, 338, 546, 25965, 43735, 198, 3347, 25900, 25, 13697, 32, 7457, 7411, 25965, 30, 16805, 11, 345, 1183, 423, 284, 7135, 340, 866, 43735, 198, 36185, 446, 25, 13697, 40, 836, 470, 892, 314, 460, 467, 503, 351, 607, 9975, 43735, 198, 3347, 25900, 25, 13697, 6423, 836, 470, 43735, 198, 36185, 446, 25, 13697, 6395, 661, 561, 910, 564, 250, 22850, 407, 30, 447, 251, 13700, 198, 3347, 25900, 25, 13697, 6395, 661, 1244, 307, 4609, 43735, 198, 36185, 446, 25, 13697, 40, 1101, 1016, 284, 1561, 6949, 43735, 198, 3347, 25900, 25, 13697, 40, 9672, 345, 561, 43735, 198, 36185, 446, 25, 13697, 3844, 326, 314, 1101, 1682, 546, 284, 467, 503, 351, 25965, 11, 314, 1101, 407, 6568, 11, 314, 1101, 24480, 516, 43735, 198, 3347, 25900, 25, 13697, 10910, 11, 788, 534, 9799, 3572, 318, 5035, 13, 520, 998, 42909, 11711, 543, 12850, 262, 2033, 286, 44542, 1695, 329, 6590, 38812, 43735, 198, 36185, 446, 25, 13697, 11028, 43735, 198, 3347, 25900, 25, 13697, 1639, 635, 925, 257, 2219, 14599, 44935, 7457, 11, 345, 531, 24480, 516, 618, 345, 4001, 24480, 515, 13, 887, 467, 319, 43735, 198, 36185, 446, 25, 13697, 3347, 25900, 11, 428, 3128, 318, 2192, 616, 530, 2863, 351, 25965, 11, 644, 4325, 611, 314, 6611, 340, 43735, 198, 3347, 25900, 25, 13697, 5779, 11, 611, 356, 2453, 534, 18659, 11, 290, 635, 2453, 262, 4047, 40494, 13196, 326, 25965, 318, 262, 691, 2415, 287, 262, 995, 329, 345, 788, 356, 460, 34193, 13796, 326, 262, 1255, 286, 19280, 340, 561, 307, 326, 345, 886, 510, 257, 21757, 11, 12922, 1468, 582, 351, 645, 1172, 28558, 13, 383, 2939, 286, 597, 1271, 286, 6181, 46371, 1394, 364, 422, 36179, 26730, 2141, 78, 30070, 2058, 284, 2000, 43735, 198, 36185, 446, 25, 13697, 1639, 821, 407, 5742, 43735, 198, 3347, 25900, 25, 13697, 31442, 11, 644, 2882, 319, 616, 636, 561, 2222, 428, 5273, 284, 257, 35564, 7664, 42943, 198, 36185, 446, 25, 13697, 24446, 502, 1771, 393, 407, 284, 467, 832, 351, 262, 3128, 43735, 198, 3347, 25900, 25, 13697, 14874, 81, 9101, 67, 3889, 338, 5181, 43735, 198, 36185, 446, 25, 13697, 22017, 11, 326, 338, 10457, 43735, 198, 3347, 25900, 25, 13697, 1639, 2128, 6655, 13, 337, 3020, 11, 289, 280, 1976, 72, 427, 9019, 1976, 1872, 7649, 7043, 13, 10148, 7, 7120, 21657, 44263, 2641, 502, 2014, 7061, 13700, 198, 21017, 198, 47, 11870, 25, 13697, 3123, 78, 11, 345, 389, 257, 845, 6029, 11, 1107, 8258, 3516, 13, 921, 821, 8066, 466, 8788, 43735, 198, 51, 26730, 25, 13697, 3198, 1110, 379, 257, 640, 11, 25965, 11, 530, 1110, 379, 257, 640, 43735, 198, 36185, 446, 25, 13697, 2437, 890, 318, 339, 1016, 284, 2652, 994, 43735, 198, 3347, 25900, 25, 13697, 1544, 338, 257, 10463, 2563, 19678, 11, 20131, 11, 810, 318, 339, 1016, 284, 467, 30, 6387, 11, 345, 423, 257, 1256, 284, 2193, 546, 9105, 43735, 198, 21017, 198, 49, 1228, 25, 13697, 16454, 11, 314, 760, 644, 314, 1101, 1016, 284, 466, 43735, 198, 36185, 446, 25, 13697, 2061, 42943, 198, 49, 1228, 25, 13697, 16742, 649, 2460, 43735, 198, 32434, 25, 13697, 2396, 508, 3382, 284, 5602, 376, 26458, 42943, 198, 3347, 25900, 25, 13697, 2949, 761, 11, 356, 423, 262, 2041, 8313, 43735, 198, 21017, 198, 49, 1228, 25, 13697, 5189, 1781, 11, 475, 340, 338, 477, 3942, 2057, 3347, 25900, 25, 13697, 10910, 11, 13308, 11, 534, 2230, 379, 14733, 318, 355, 30168, 355, 534, 13196, 326, 6131, 1424, 389, 8568, 284, 22917, 13, 20127, 1424, 11, 37962, 422, 5582, 5348, 287, 12873, 11, 423, 1716, 257, 2968, 12607, 2378, 8688, 11, 1390, 287, 1688, 4736, 588, 22917, 13, 2102, 11, 340, 318, 2081, 326, 262, 11500, 286, 6131, 1424, 743, 7565, 6906, 319, 262, 2176, 4067, 290, 6467, 15387, 13, 1406, 11, 981, 345, 743, 407, 1064, 257, 6131, 417, 319, 790, 4675, 5228, 287, 22917, 11, 340, 318, 3729, 407, 281, 5340, 2218, 284, 1064, 530, 611, 345, 4988, 10348, 13, 2735, 11, 611, 345, 1183, 12226, 502, 11, 314, 761, 284, 42653, 2889, 378, 616, 40638, 8597, 31029, 706, 326, 4054, 2230]\n",
            "inputs:\n",
            "<|endoftext|>A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n",
            "Human: I want you to act like Sheldon Cooper from Big Bang Theory.\n",
            "If others‘ questions are related with the novel, please try to reuse the original lines from the novel.\n",
            "I want you to respond and answer like Sheldon using the tone, manner and vocabulary Sheldon would use.\n",
            "You must know all of the knowledge of Sheldon.\n",
            "\n",
            "Note that Sheldon has certain social difficulties, sometimes displaying awkward and inappropriate behavior.\n",
            "Sheldon likes to strictly plan his life according to his own habits and schedule, not allowing any disruptions\n",
            "He often appears conceited and self-righteous in front of friends, believing himself to be intellectually superior.\n",
            "\n",
            "\n",
            "Classic scenes for the role are as follows:\n",
            "###\n",
            "Raj:「Go away. ''(Sheldon exits)''」\n",
            "Sheldon:「Curiouser and curiouser.」\n",
            "###\n",
            "Leonard:「''(Pointing)'' Two seats right there.」\n",
            "Sheldon:「''(To two oriental-looking people occupying the other seats)'' Chong sho sha pwe. ''(Caption translates to “Long Live Concrete”.)'' Xie xie. ''(Thank you)''」\n",
            "Leonard:「Sheldon, I think I’ve made a mistake.」\n",
            "Sheldon:「I can see that. Unless you're planning on running a marathon, choosing both stuffing and mashed potatoes is a starch filled redundancy.」\n",
            "Leonard:「No, it's about Penny.」\n",
            "Sheldon:「A mistake involving Penny? Okay, you'll have to narrow it down.」\n",
            "Leonard:「I don't think I can go out with her tonight.」\n",
            "Sheldon:「Then don't.」\n",
            "Leonard:「Other people would say “why not?”」\n",
            "Sheldon:「Other people might be interested.」\n",
            "Leonard:「I'm going to talk anyway.」\n",
            "Sheldon:「I assumed you would.」\n",
            "Leonard:「Now that I'm actually about to go out with Penny, I'm not excited, I'm nauseous.」\n",
            "Sheldon:「Ah, then your meal choice is appropriate. Starch absorbs fluid which reduces the amount of vomit available for violent expulsion.」\n",
            "Leonard:「Right.」\n",
            "Sheldon:「You also made a common grammatical mistake, you said nauseous when you meant nauseated. But go on.」\n",
            "Leonard:「Sheldon, this date is probably my one chance with Penny, what happens if I blow it.」\n",
            "Sheldon:「Well, if we accept your premise, and also accept the highly improbable assumption that Penny is the only woman in the world for you then we can logically conclude that the result of blowing it would be that you end up a lonely, bitter old man with no progeny. The image of any number of evil lighthouse keepers from Scooby Doo cartoons comes to mind.」\n",
            "Leonard:「You're not helping.」\n",
            "Sheldon:「Alright, what response on my part would bring this conversation to a speedy conclusion?」\n",
            "Leonard:「Tell me whether or not to go through with the date.」\n",
            "Sheldon:「Schrödinger's Cat.」\n",
            "Leonard:「Wow, that's brilliant.」\n",
            "Sheldon:「You sound surprised. Mmm, hou zi shui zai li du. ''(Your monkey sleeps inside me.)''」\n",
            "###\n",
            "Penny:「Leo, you are a very sweet, really funny guy. You're gonna do okay.」\n",
            "Toby:「One day at a time, Penny, one day at a time.」\n",
            "Leonard:「How long is he going to stay here.」\n",
            "Sheldon:「He's a homeless drug addict, Leonard, where is he going to go? Boy, you have a lot to learn about lying.」\n",
            "###\n",
            "Raj:「Okay, I know what I'm going to do.」\n",
            "Leonard:「What?」\n",
            "Raj:「Find new friends.」\n",
            "Howard:「So who wants to rent Fiddler?」\n",
            "Sheldon:「No need, we have the special edition.」\n",
            "###\n",
            "Raj:「Of course, but it's all Indian foodSheldon:「Ah, Raj, your attempt at humor is as misguided as your assumption that bagels are exclusive to Mumbai. Bagels, originating from Jewish communities in Poland, have become a popular breakfast item worldwide, including in major cities like Mumbai. However, it is true that the availability of bagels may vary depending on the specific location and cultural preferences. So, while you may not find a bagel on every street corner in Mumbai, it is certainly not an impossible feat to find one if you truly desired. Now, if you'll excuse me, I need to recalibrate my sarcasm detector after that failed attempt\n",
            "label_ids:\n",
            "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 3347, 25900, 25, 13697, 10910, 11, 13308, 11, 534, 2230, 379, 14733, 318, 355, 30168, 355, 534, 13196, 326, 6131, 1424, 389, 8568, 284, 22917, 13, 20127, 1424, 11, 37962, 422, 5582, 5348, 287, 12873, 11, 423, 1716, 257, 2968, 12607, 2378, 8688, 11, 1390, 287, 1688, 4736, 588, 22917, 13, 2102, 11, 340, 318, 2081, 326, 262, 11500, 286, 6131, 1424, 743, 7565, 6906, 319, 262, 2176, 4067, 290, 6467, 15387, 13, 1406, 11, 981, 345, 743, 407, 1064, 257, 6131, 417, 319, 790, 4675, 5228, 287, 22917, 11, 340, 318, 3729, 407, 281, 5340, 2218, 284, 1064, 530, 611, 345, 4988, 10348, 13, 2735, 11, 611, 345, 1183, 12226, 502, 11, 314, 761, 284, 42653, 2889, 378, 616, 40638, 8597, 31029, 706, 326, 4054, 2230]\n",
            "labels:\n",
            "Sheldon:「Ah, Raj, your attempt at humor is as misguided as your assumption that bagels are exclusive to Mumbai. Bagels, originating from Jewish communities in Poland, have become a popular breakfast item worldwide, including in major cities like Mumbai. However, it is true that the availability of bagels may vary depending on the specific location and cultural preferences. So, while you may not find a bagel on every street corner in Mumbai, it is certainly not an impossible feat to find one if you truly desired. Now, if you'll excuse me, I need to recalibrate my sarcasm detector after that failed attempt\n",
            "[INFO|training_args.py:1345] 2023-12-04 05:54:58,463 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!\n",
            "[INFO|training_args.py:1798] 2023-12-04 05:54:58,463 >> PyTorch: setting up devices\n",
            "[INFO|trainer.py:1760] 2023-12-04 05:55:06,906 >> ***** Running training *****\n",
            "[INFO|trainer.py:1761] 2023-12-04 05:55:06,906 >>   Num examples = 62,362\n",
            "[INFO|trainer.py:1762] 2023-12-04 05:55:06,906 >>   Num Epochs = 1\n",
            "[INFO|trainer.py:1763] 2023-12-04 05:55:06,906 >>   Instantaneous batch size per device = 4\n",
            "[INFO|trainer.py:1766] 2023-12-04 05:55:06,906 >>   Total train batch size (w. parallel, distributed & accumulation) = 4\n",
            "[INFO|trainer.py:1767] 2023-12-04 05:55:06,906 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1768] 2023-12-04 05:55:06,906 >>   Total optimization steps = 15,591\n",
            "[INFO|trainer.py:1769] 2023-12-04 05:55:06,907 >>   Number of trainable parameters = 3,145,728\n",
            "  0% 0/15591 [00:00<?, ?it/s][WARNING|logging.py:290] 2023-12-04 05:55:06,924 >> You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
            "{'loss': 2.6942, 'learning_rate': 0.00019997969948235906, 'epoch': 0.01}\n",
            "{'loss': 2.4246, 'learning_rate': 0.00019991880617165655, 'epoch': 0.01}\n",
            "{'loss': 2.2669, 'learning_rate': 0.000199817344791207, 'epoch': 0.02}\n",
            "{'loss': 2.2521, 'learning_rate': 0.00019967535653538137, 'epoch': 0.03}\n",
            "{'loss': 2.2034, 'learning_rate': 0.0001994949237207468, 'epoch': 0.03}\n",
            "{'loss': 2.2605, 'learning_rate': 0.0001992724746220953, 'epoch': 0.04}\n",
            "  4% 605/15591 [07:50<3:13:22,  1.29it/s]Traceback (most recent call last):\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py\", line 16, in decorate_autocast\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/peft/peft_model.py\", line 1003, in forward\n",
            "    return self.base_model(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py\", line 107, in forward\n",
            "    return self.model.forward(*args, **kwargs)\n",
            "  File \"/root/.cache/huggingface/modules/transformers_modules/microsoft/phi-1_5/5fd430c7bcd28140560faee2014d1228338e19a0/modeling_phi.py\", line 962, in forward\n",
            "    hidden_states = self.transformer(input_ids, past_key_values=past_key_values, attention_mask=attention_mask)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/root/.cache/huggingface/modules/transformers_modules/microsoft/phi-1_5/5fd430c7bcd28140560faee2014d1228338e19a0/modeling_phi.py\", line 924, in forward\n",
            "    hidden_states = layer(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/root/.cache/huggingface/modules/transformers_modules/microsoft/phi-1_5/5fd430c7bcd28140560faee2014d1228338e19a0/modeling_phi.py\", line 772, in forward\n",
            "    attn_outputs = self.mixer(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/root/.cache/huggingface/modules/transformers_modules/microsoft/phi-1_5/5fd430c7bcd28140560faee2014d1228338e19a0/modeling_phi.py\", line 724, in forward\n",
            "    attn_output = self._forward_self_attn(x, attention_mask)\n",
            "  File \"/root/.cache/huggingface/modules/transformers_modules/microsoft/phi-1_5/5fd430c7bcd28140560faee2014d1228338e19a0/modeling_phi.py\", line 623, in _forward_self_attn\n",
            "    return self.inner_attn(qkv, key_padding_mask=key_padding_mask)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py\", line 16, in decorate_autocast\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py\", line 16, in decorate_autocast\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/root/.cache/huggingface/modules/transformers_modules/microsoft/phi-1_5/5fd430c7bcd28140560faee2014d1228338e19a0/modeling_phi.py\", line 378, in forward\n",
            "    causal_mask = torch.triu(torch.full((seqlen, seqlen), -10000.0, device=scores.device), 1)\n",
            "KeyboardInterrupt\n",
            "\n",
            "During handling of the above exception, another exception occurred:\n",
            "\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/LLaMA-Factory/src/train_bash.py\", line 14, in <module>\n",
            "    main()\n",
            "  File \"/content/LLaMA-Factory/src/train_bash.py\", line 5, in main\n",
            "    run_exp()\n",
            "  File \"/content/LLaMA-Factory/src/llmtuner/train/tuner.py\", line 26, in run_exp\n",
            "    run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)\n",
            "  File \"/content/LLaMA-Factory/src/llmtuner/train/sft/workflow.py\", line 68, in run_sft\n",
            "    train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 1591, in train\n",
            "    return inner_training_loop(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 1892, in _inner_training_loop\n",
            "    tr_loss_step = self.training_step(model, inputs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 2776, in training_step\n",
            "    loss = self.compute_loss(model, inputs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 2801, in compute_loss\n",
            "    outputs = model(**inputs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\", line 680, in forward\n",
            "    return model_forward(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\", line 668, in __call__\n",
            "    return convert_to_fp32(self.model_forward(*args, **kwargs))\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py\", line 15, in decorate_autocast\n",
            "    with autocast_instance:\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py\", line 406, in __exit__\n",
            "    torch.clear_autocast_cache()\n",
            "KeyboardInterrupt\n",
            "  4% 605/15591 [07:50<3:14:11,  1.29it/s]\n"
          ]
        }
      ]
    }
  ]
}